#!/usr/bin/env python3
"""把指定目錄下的原始資料輸出為靜態站台"""
import json
import logging
import os
import shutil
import urllib.request as ur
from collections import UserDict, namedtuple
from copy import deepcopy
from datetime import datetime
from functools import partial

import jinja2
import jinja2.meta

from . import fchecksum, fcopy, fwrite, is_relative_to
from .syntaxparser import Parser

logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s')
log = logging.getLogger(__name__)


PathMapEntry = namedtuple('PathMapEntry',
                          ('src', 'src_root', 'src_subpath', 'dst', 'dst_root', 'dst_subpath', 'exists'),
                          defaults=(None, None, None, None, None, None, False))


class PathMap(UserDict):
    """路徑對照表"""

    def __init__(self, src_root, dst_root):
        super().__init__()
        self.src_root = src_root
        self.dst_root = dst_root

    def add(self, src=None, dst=None, exists=True, quiet=False):
        """將一筆新的路徑資料加入路徑對照表"""
        if src is not None:
            src = os.path.abspath(src)
            src_subpath = os.path.relpath(src, self.src_root)
        else:
            src_subpath = None

        if dst is not None:
            dst = os.path.abspath(dst)
            dst_subpath = os.path.relpath(dst, self.dst_root)
        else:
            dst_subpath = None

        map_ = PathMapEntry(
            src=src,
            src_root=self.src_root,
            src_subpath=src_subpath,
            dst=dst,
            dst_root=self.dst_root,
            dst_subpath=dst_subpath,
            exists=exists,
        )

        types = ('src', 'src_subpath', 'dst', 'dst_subpath')

        # 檢查是否有重疊
        for type_ in types:
            value = getattr(map_, type_)
            if value is None:
                continue
            try:
                map_old = self.data[(type_, value)]
            except KeyError:
                pass
            else:
                if not quiet:
                    log.warning(f'Duplicated mapping of "{value}" ({type_}) with: {map_old}')
                return

        for type_ in types:
            value = getattr(map_, type_)
            if value is None:
                continue
            self.data[(type_, value)] = map_

    def get(self, path, type='src'):
        """根據路徑及路徑類型查詢，傳回路徑項目 (PathMapEntry)

        Args:
            path (str): 路徑
            type (str): 路徑類型
                - 'src': 原始檔路徑
                - 'src_subpath': 原始檔子路徑
                - 'dst': 輸出檔路徑
                - 'dst_subpath': 輸出檔子路徑
        """
        if type in ('src', 'dst'):
            path = os.path.abspath(path)

        return self.data.get((type, path), PathMapEntry())


class TemplateHandler():
    """模版控制器"""

    def __init__(self, root, ssg):
        self.root = root
        self.ssg = ssg
        self.env = jinja2.Environment(
            loader=jinja2.FileSystemLoader(root),
            autoescape=jinja2.select_autoescape(),
        )
        self.env.policies['json.dumps_kwargs'] = {'sort_keys': False, 'ensure_ascii': False}
        self.files = [os.path.normpath(os.path.join(root, f)) for f in self.env.list_templates()]

    def apply(self, fdst, body, info):
        """套用模版"""
        tpl_name = info.get('data-type') or None
        tpl_ext = self.ssg.assets.get('template_ext') or self.ssg.output_type

        try:
            assert tpl_name is not None
            template = self.env.get_template(f'{tpl_name}.{tpl_ext}')
        except (AssertionError, jinja2.exceptions.TemplateNotFound):
            tpl_default = self.ssg.assets.get('template_default', 'base')
            template = self.env.get_template(f'{tpl_default}.{tpl_ext}')

        return template.render(
            info=info,
            assets=self.ssg.assets,
            pathinfo=self.ssg.map.get(fdst, 'dst'),
            body=body,
            ur=ur,
            get_url=partial(self.get_url, fdst=fdst),
            get_breadcrumbs=partial(self.get_breadcrumbs, fdst=fdst),
        )

    def get_referenced_templates(self, tpl):
        """遞迴取得所有被指定模版引用的模版（含自身）"""
        rv = {}
        self._get_referenced_templates(tpl, rv)
        return list(rv)

    def _get_referenced_templates(self, tpl, dict_):
        dict_[tpl] = True
        src, _, _ = self.env.loader.get_source(self.env, tpl)
        ast = self.env.parse(src)
        for subtpl in jinja2.meta.find_referenced_templates(ast):
            if subtpl in dict_:
                continue
            self._get_referenced_templates(subtpl, dict_)

    def get_url(self, path, fdst):
        """取得相對連結 URL"""
        return self.ssg.url_for(path, fdst)

    def get_breadcrumbs(self, fdst):
        """產生麵包屑導覽鏈

        Returns:
            generator: 輸出 (標題, 網址|None)，由上層至下層
        """
        ddst = os.path.dirname(fdst)
        for part, subpath in self.ssg.breadcrumbs_for(fdst):
            title = self.ssg.get_meta(subpath, 'title') or part
            pathinfo = self.ssg.map.get(subpath, 'dst_subpath')
            if pathinfo.exists:
                url = ur.pathname2url(os.path.relpath(pathinfo.dst, ddst))
                yield title, url
            else:
                yield title, None


class CacheHandler(UserDict):
    """快取控制器

    Attributes:
        updated (bool): 快取是否已更新
        updated_files (dict): 快取資訊有更新的檔案，dict key 為相對於 data/ 的路徑
    """

    def __init__(self, file, root=''):
        """Load cache from the disk.

        - Or generate one if not exist.
        """
        super().__init__()

        self.root = os.path.abspath(root)
        self.file = file
        self.updated = False
        self.updated_files = {}

        # attempt to load from file
        try:
            fh = open(file, 'r', encoding='UTF-8')
        except FileNotFoundError:
            self.data = deepcopy(self.DEFAULT_DATA)
            return

        with fh as fh:
            data = json.load(fh)

        if not (0 < data.get('schema', 0) <= 2):
            self.data = deepcopy(self.DEFAULT_DATA)
            return

        # migrate from old version (if applicable)
        if data['schema'] == 1:
            data = self._upgrade_cache_v1(data)
            self.updated = True

        self.data = data

    def save(self):
        """Save cache to the disk."""
        if not self.updated:
            return

        with open(self.file, 'w', encoding='UTF-8') as fh:
            json.dump(self.data, fh, ensure_ascii=False)

    def update_file_stat(self, file):
        """Update status for the file.

        Returns:
            bool: file stat has been changed
        """
        subpath = os.path.relpath(file, self.root)

        updated = self.updated_files.get(subpath, None)

        if updated is not None:
            return updated

        updated = False

        try:
            stat = os.stat(file)
        except OSError:
            try:
                del self.data['src'][subpath]
            except KeyError:
                pass
            else:
                updated = True
                self.updated = True
        else:
            info = self.data['src'].setdefault(subpath, {})
            mtime = stat.st_mtime
            size = stat.st_size
            if mtime > info.get('mtime', -1) or size != info.get('size', -1):
                info['mtime'] = mtime
                info['size'] = size
                self.updated = True

                hash_ = fchecksum(file)
                if hash_ != info.get('hash', ''):
                    info['hash'] = hash_
                    updated = True

        self.updated_files[subpath] = updated
        return updated

    @classmethod
    def _upgrade_cache_v1(cls, data):
        return {
            'schema': 2,
            'src': data['assets'],
        }

    DEFAULT_DATA = {
        'schema': 2,
        'src': {},
    }


class StaticSiteGenerator():
    """靜態站台產生器

    Attributes:
        force (bool): 強制產生頁面，無論是否有變更
        map (PathMap): 路徑對照表
        meta (dict): 各檔案的元資料，dict key 為相對於 public/ 的路徑
        generated (dict): 各檔案是否已生成，dict key 為相對於 public/ 的路徑
    """

    def __init__(self, root='', conf=None):
        conf = conf if conf is not None else {}
        self.index = conf.get('index_filename', 'index')
        self.output_type = conf.get('output_format', 'html')
        self.theme = conf.get('theme', 'default')
        self.assets = conf.get('assets', {})
        self.assets.update(conf.get('theme_assets', {}).get(self.theme, {}))

        self.dir_data = os.path.abspath(root)
        self.dir_source = os.path.normpath(os.path.join(self.dir_data, conf.get('source_dir', 'pages')))
        self.dir_public = os.path.normpath(os.path.join(self.dir_data, conf.get('public_dir', 'public')))
        self.dir_theme = os.path.normpath(os.path.join(self.dir_data, conf.get('themes_dir', 'themes'), self.theme))
        self.dir_static = os.path.join(self.dir_theme, 'static')
        self.dir_template = os.path.join(self.dir_theme, 'template', self.output_type)
        self.file_cache = os.path.normpath(os.path.join(self.dir_data, conf.get('cache_file', 'cache.json')))

    def clean(self):
        """清除產生的靜態站台檔案及快取檔"""
        log.info(f'Removing: {self.dir_public} ...')
        try:
            shutil.rmtree(self.dir_public)
        except FileNotFoundError:
            pass

        log.info(f'Removing: {self.file_cache} ...')
        try:
            os.remove(self.file_cache)
        except FileNotFoundError:
            pass

    def run(self, force=False):
        """產生靜態站台檔案

        - 每次執行時會將原始檔的狀態記錄在快取檔。再次執行時會比對快取檔檢查相關原始檔
          是否變更而重新產生相關檔案。（快取檔在最後階段才會更新，若程式執行時中止或出
          錯則不會更新。以確保相關檔案在無錯誤下重新產生。）
        - 為效能考量，不會檢查輸出的檔案。若原始檔未更動，即使輸出的檔案有更動也不會重
          新產生。若輸出的檔案遭未預期更動，可執行 clean() 清除再 run() 重新產生。

        Args:
            force (bool): 屬真時強制重新產生所有頁面。（主要用於產生器或設定檔有重大變
                更，足以導致輸出不同，但由於模版或原始檔未更動而不會重新產生的情況。）
        """
        # 變數初始化
        starttime = datetime.now()
        self.force = force
        self.map = PathMap(src_root=self.dir_data, dst_root=self.dir_public)
        self.meta = {}
        self.generated = {}

        # 建立輸出目錄
        # 如果 public/ 是檔案，則讓程式自然出錯中止
        os.makedirs(self.dir_public, exist_ok=True)

        # 產生輸出內容
        self.parser = Parser(
            char_subst_table=self.assets.get('char_subst_table'),
            renderer_filter=self.assets.get('renderer_filter'),
        )
        self.templater = TemplateHandler(self.dir_template, self)
        self.cache = CacheHandler(self.file_cache, self.dir_data)

        self.map_src_and_dst()
        self.copy_public()
        self.generate_html()
        self.remove_stale_files()
        self.remove_stale_cache()
        self.cache.save()

        # 完成
        generated = sum(1 for v in self.generated.values() if v)
        endtime = datetime.now()
        log.info(f'{generated} file(s) generated in {endtime - starttime}')

    def map_src_and_dst(self):
        """建立原始檔及輸出檔的對照表"""
        log.info('Generating map...')

        # static/*
        fsrc_root = self.dir_static
        fdst_root = self.dir_public
        for root, _subdirs, subfiles in os.walk(fsrc_root):
            for subfile in subfiles:
                fsrc = os.path.join(root, subfile)
                fsrc_subpath = os.path.relpath(fsrc, fsrc_root)
                fdst_subpath = fsrc_subpath
                fdst = os.path.join(fdst_root, fdst_subpath)
                self.map.add(src=fsrc, dst=fdst)

        # source/*
        fsrc_root = self.dir_source
        fdst_root = self.dir_public
        for root, _subdirs, subfiles in os.walk(fsrc_root):
            for subfile in subfiles:
                fsrc = os.path.join(root, subfile)
                fsrc_subpath = os.path.relpath(fsrc, fsrc_root)

                (_, ext) = os.path.splitext(fsrc)
                type_ = ext[1:]

                if Parser.get_lexer(type_):
                    fdst_subpath = os.path.splitext(fsrc_subpath)[0] + '.' + self.output_type
                    fdst = os.path.join(fdst_root, fdst_subpath)
                else:
                    fdst_subpath = fsrc_subpath
                    fdst = os.path.join(fdst_root, fdst_subpath)

                self.map.add(src=fsrc, dst=fdst)

        # cache
        # Register a non-exist map entry for a file to be removed
        # (in cache but not in static/* or source/*).
        fdst_root = self.dir_public
        for fsrc_root in (self.dir_static, self.dir_source):
            for subpath in self.cache['src']:
                fsrc = os.path.join(self.dir_data, subpath)

                if not is_relative_to(fsrc, fsrc_root):
                    continue

                fsrc_subpath = os.path.relpath(fsrc, fsrc_root)
                fdst_subpath = fsrc_subpath
                fdst = os.path.join(fdst_root, fdst_subpath)
                self.map.add(src=fsrc, dst=fdst, exists=False, quiet=True)

    def copy_public(self):
        """將 static/* 複製至 public/*"""
        for root, _subdirs, subfiles in os.walk(self.dir_static):
            for subfile in subfiles:
                fsrc = os.path.join(root, subfile)

                pathinfo = self.map.get(fsrc, 'src')
                if not pathinfo.exists:
                    continue

                fdst = pathinfo.dst

                if self.check_update(fsrc, fdst):
                    log.info(f'Copying to: {fdst}')
                    fcopy(fsrc, fdst)

    def generate_html(self):
        """根據 source/* 產生至 public/*.html"""
        for root, _subdirs, subfiles in os.walk(self.dir_source):
            for subfile in subfiles:
                fsrc = os.path.join(root, subfile)
                self.generate_from_file(fsrc)

    def generate_from_file(self, fsrc):
        """將指定的檔案套用模版產生至 public/

        - 無模版可套用則視為不需生成的檔案，直接複製

        @TODO: 目前尚無法有效偵測模版更新時要更新哪些檔案，因生成檔案過程才開始
               計算會用到哪些模版，若要先知道會用到哪些模版才能決定是否生成檔案
               則形成無限迴圈。目前暫定任一模版更新即重新生成所有網頁。
        """
        (_, ext) = os.path.splitext(fsrc)
        type_ = ext[1:]

        if Parser.get_lexer(type_):
            pathinfo = self.map.get(fsrc, 'src')
            if not pathinfo.exists:
                return

            fdst = pathinfo.dst

            sources = self.templater.files.copy()
            for _, subpath in self.breadcrumbs_for(fdst):
                source = self.map.get(subpath, 'dst_subpath').src
                if source:
                    sources.append(source)

            if not self.check_update(sources, fdst, self.force):
                return

            log.info(f'Generating: {fdst}')
            output, info = self.parse_file(fsrc, input_type=type_, output_type=self.output_type)

            # 快取元資料，因後續可能被其他頁面引用
            fdst_subpath = pathinfo.dst_subpath
            self.meta[fdst_subpath] = info

            output = self.templater.apply(fdst, output, info)

            fwrite(fdst, output)
        else:
            pathinfo = self.map.get(fsrc, 'src')
            if not pathinfo.exists:
                return

            fdst = pathinfo.dst

            if not self.check_update(fsrc, fdst):
                return

            log.info(f'Copying to: {fdst}')
            fcopy(fsrc, fdst)

    def parse_file(self, fsrc, input_type, output_type='html'):
        """用指定的解析器解析檔案並傳回結果"""
        if not Parser.get_lexer(input_type):
            return '', {}

        try:
            fh = open(fsrc, 'rb')
        except FileNotFoundError:
            return '', {}

        with fh as fh:
            output = self.parser.run(fh, input_type, output_type)
            return output, self.parser.info

    def remove_stale_files(self):
        """移除 source/ 已不存在仍但存在於 public/ 的檔案"""
        for root, _subdirs, subfiles in os.walk(self.dir_public, topdown=False):
            for subfile in subfiles:
                fdst = os.path.join(root, subfile)
                fdst_subpath = os.path.relpath(fdst, self.dir_public)

                if fdst_subpath not in self.generated:
                    log.info(f'Removing stale file: {fdst}')
                    os.remove(fdst)

            with os.scandir(root) as it:
                try:
                    next(it)
                except StopIteration:
                    log.info(f'Removing stale directory: {root}')
                    os.rmdir(root)

    def remove_stale_cache(self):
        """移除已不存在的快取項"""
        stale_entries = []
        for subpath in self.cache['src']:
            if subpath not in self.cache.updated_files:
                stale_entries.append(subpath)
                self.cache.updated = True

        for subpath in stale_entries:
            del self.cache['src'][subpath]

    def url_for(self, target, ref):
        """計算 ref 至 target 的路徑

        Args:
            target (str): 目標檔案，為絕對路徑或相對於 public/ 的路徑
            ref (str): 基準檔案或目錄，為絕對路徑或相對於 CWD 的路徑
        """
        if not os.path.isdir(ref):
            ref = os.path.dirname(ref)
        target = os.path.join(self.dir_public, target)
        relpath = os.path.relpath(target, ref)
        return ur.pathname2url(relpath)

    def breadcrumbs_for(self, fdst):
        """取得檔案麵包屑的相關檔案

        Args:
            fdst (str): 目標檔案

        Yields:
            tuple: (層名, 檔案路徑)，由上層至下層
        """
        index_filename = self.index + '.' + self.output_type
        subpaths = []
        pathlist = os.path.join('.', os.path.relpath(fdst, self.dir_public)).split(os.sep)
        for idx, part in enumerate(pathlist):
            subpaths.append(part)
            subpath = os.path.join(*subpaths)

            if idx == len(pathlist) - 1:
                if part == index_filename:
                    break
            else:
                subpath = os.path.join(subpath, index_filename)

            yield (part, os.path.normpath(subpath))

    def check_update(self, fsrc, fdst, force=False):
        """檢查 fsrc 是否比 fdst 新，是否需要更新 fdst，並記錄檔案是否被產生

        Args:
            fsrc (str, list): 要比對的原始檔
            fdst (str): 要產生的檔案
            force (bool): 強制產生
        """
        fdst_subpath = self.map.get(fdst, 'dst').dst_subpath

        need_generate = force

        if not os.path.exists(fdst):
            need_generate = True

        elif not os.path.isfile(fdst):
            log.info(f'Removing directory: {fdst}')
            shutil.rmtree(fdst)
            need_generate = True

        if isinstance(fsrc, str):
            fsrc = [fsrc]

        for f in fsrc:
            if self.cache.update_file_stat(f):
                need_generate = True

        self.generated[fdst_subpath] = need_generate
        return need_generate

    def get_meta(self, fdst_subpath, key, default=None):
        """取得指定檔案的元資料

        找不到可能是再次執行產生器時本次未重新產生的檔案尚無 meta 記錄，
        嘗試找尋原始檔重新解析，若找不到原始檔或重新解析得不到值，則取 default
        """
        try:
            meta = self.meta[fdst_subpath]
        except KeyError:
            fsrc = self.map.get(fdst_subpath, 'dst_subpath').src

            if fsrc is not None and is_relative_to(fsrc, self.dir_source):
                _, ext = os.path.splitext(fsrc)
                type_ = ext[1:]
                _, info = self.parse_file(fsrc, input_type=type_, output_type=None)
                meta = self.meta[fdst_subpath] = info
            else:
                meta = self.meta[fdst_subpath] = {}

        return meta.get(key, default)
